{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "from operator import itemgetter\n",
    "\n",
    "\n",
    "K=20"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=[\n",
    "    ['A','a',5],\n",
    "    ['A','b',3],\n",
    "    ['A','c',4],\n",
    "    ['A','d',4],\n",
    "    ['B','a',3],\n",
    "    ['B','b',1],\n",
    "    ['B','c',2],\n",
    "    ['B','d',2],\n",
    "    ['B','e',2],\n",
    "    ['C','a',4],\n",
    "    ['C','b',4],\n",
    "    ['C','c',4],\n",
    "    ['C','d',4],\n",
    "    ['C','e',4],\n",
    "    ['D','a',3],\n",
    "    ['D','b',2],\n",
    "    ['D','c',2],\n",
    "    ['D','e',3],\n",
    "]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A': {'a': 5, 'b': 3, 'c': 4, 'd': 4},\n",
       " 'B': {'a': 3, 'b': 1, 'c': 2, 'd': 2, 'e': 2},\n",
       " 'C': {'a': 4, 'b': 4, 'c': 4, 'd': 4, 'e': 4},\n",
       " 'D': {'a': 3, 'b': 2, 'c': 2, 'e': 3}}"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data = {}\n",
    "\n",
    "for user, item,record in data:\n",
    "    train_data.setdefault(user,{})\n",
    "    train_data[user][item] = record\n",
    "\n",
    "train_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A': 4.0, 'B': 2.0, 'C': 4.0, 'D': 2.5}"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算每个用户的平均评分\n",
    "average_rating = {}\n",
    "for u, items in train_data.items():\n",
    "    average_rating.setdefault(u, 0)\n",
    "    for i in items:\n",
    "        average_rating[u] += train_data[u][i] / len(items)\n",
    "average_rating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'a': {'A', 'B', 'C', 'D'},\n",
       " 'b': {'A', 'B', 'C', 'D'},\n",
       " 'c': {'A', 'B', 'C', 'D'},\n",
       " 'd': {'A', 'B', 'C'},\n",
       " 'e': {'B', 'C', 'D'}}"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 建立item_user倒排表\n",
    "# item->set\n",
    "item_users = dict()\n",
    "for u, items in train_data.items():\n",
    "    for i in items:\n",
    "        if i not in item_users:\n",
    "            item_users[i] = set()\n",
    "        item_users[i].add(u)\n",
    "item_users"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 余弦相似度\n",
    "def calCosineSimi():\n",
    "    C1 = dict()\n",
    "    C2 = dict()\n",
    "    C3 = dict()\n",
    "    for i, users in item_users.items():\n",
    "        for u in users:\n",
    "            for v in users:\n",
    "                if u == v:\n",
    "                    continue\n",
    "                C1.setdefault(u, {})\n",
    "                C1[u].setdefault(v, 0)\n",
    "                C2.setdefault(u, {})\n",
    "                C2[u].setdefault(v, 0)\n",
    "                C3.setdefault(u, {})\n",
    "                C3[u].setdefault(v, 0)\n",
    "\n",
    "                C1[u][v] += ((train_data[u][i]) * (\n",
    "                        train_data[v][i] ))\n",
    "                C2[u][v] += ((train_data[u][i]) * (\n",
    "                        train_data[u][i] ))\n",
    "                C3[u][v] += ((train_data[v][i] ) * (\n",
    "                        train_data[v][i] ))\n",
    "\n",
    "    # 计算最终的用户相似度矩阵\n",
    "    user_sim = dict()\n",
    "    for u, related_users in C1.items():\n",
    "        user_sim[u] = {}\n",
    "        for v, cuv in related_users.items():\n",
    "            if C1[u][v]==0:\n",
    "                user_sim[u][v]=0\n",
    "            else:\n",
    "                user_sim[u][v] = C1[u][v] / math.sqrt(C2[u][v] * C3[u][v])\n",
    "    return user_sim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 皮尔逊相似度\n",
    "def calPearsonSimi():\n",
    "    C1 = dict()\n",
    "    C2 = dict()\n",
    "    C3 = dict()\n",
    "    for i, users in item_users.items():\n",
    "        for u in users:\n",
    "            for v in users:\n",
    "                if u == v:\n",
    "                    continue\n",
    "                C1.setdefault(u, {})\n",
    "                C1[u].setdefault(v, 0)\n",
    "                C2.setdefault(u, {})\n",
    "                C2[u].setdefault(v, 0)\n",
    "                C3.setdefault(u, {})\n",
    "                C3[u].setdefault(v, 0)\n",
    "                \n",
    "                C1[u][v] += ((train_data[u][i] - average_rating[u]) * (\n",
    "                        train_data[v][i] - average_rating[v]))\n",
    "                C2[u][v] += ((train_data[u][i] - average_rating[u]) * (\n",
    "                        train_data[u][i] - average_rating[u]))\n",
    "                C3[u][v] += ((train_data[v][i] - average_rating[v]) * (\n",
    "                        train_data[v][i] - average_rating[v]))\n",
    "\n",
    "    # 计算最终的用户相似度矩阵\n",
    "    user_sim = dict()\n",
    "    for u, related_users in C1.items():\n",
    "        user_sim[u] = {}\n",
    "        for v, cuv in related_users.items():\n",
    "            if C1[u][v]==0:\n",
    "                user_sim[u][v]=0\n",
    "            else:\n",
    "                user_sim[u][v] = C1[u][v] / math.sqrt(C2[u][v] * C3[u][v])\n",
    "    return user_sim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict(user_sim,user, item):\n",
    "    rui = average_rating[user]\n",
    "    # 分子和分母\n",
    "    C1 = 0\n",
    "    C2 = 0\n",
    "    for similar_user, similarity_factor in sorted(user_sim[user].items(),\n",
    "                                                  key=itemgetter(1), reverse=True)[0:K]:\n",
    "        if item not in train_data[similar_user]:\n",
    "            continue\n",
    "        C1 += similarity_factor * (train_data[similar_user][item] - average_rating[similar_user])\n",
    "        C2 += math.fabs(similarity_factor)\n",
    "    if not C1==0:\n",
    "        rui += (C1 / C2)\n",
    "    else :\n",
    "        rui=0\n",
    "    return rui"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A': {'D': 0.9946917938265513,\n",
       "  'C': 0.9847319278346618,\n",
       "  'B': 0.9864400504156211},\n",
       " 'D': {'A': 0.9946917938265513,\n",
       "  'C': 0.9805806756909202,\n",
       "  'B': 0.9707253433941508},\n",
       " 'C': {'A': 0.9847319278346618,\n",
       "  'D': 0.9805806756909202,\n",
       "  'B': 0.9534625892455924},\n",
       " 'B': {'A': 0.9864400504156211,\n",
       "  'D': 0.9707253433941508,\n",
       "  'C': 0.9534625892455924}}"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 余弦相似度\n",
    "user_sim1=calCosineSimi()\n",
    "user_sim1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A': {'D': 0.8164965809277261, 'C': 0, 'B': 1.0},\n",
       " 'D': {'A': 0.8164965809277261, 'C': 0, 'B': 0.7071067811865475},\n",
       " 'C': {'A': 0, 'D': 0, 'B': 0},\n",
       " 'B': {'A': 1.0, 'D': 0.7071067811865475, 'C': 0}}"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 皮尔逊相似度\n",
    "user_sim2=calPearsonSimi()\n",
    "user_sim2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.167690067762287"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用余弦相似度进行推荐\n",
    "predict(user_sim1,'A','e')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.224744871391589"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用皮尔逊相似度进行推荐\n",
    "predict(user_sim2,'A','e')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
